library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
library(readr)
library(knitr)
library(stringr)
data <- read_csv("Manhattan_Restaurant_Inspection_Results.csv")
## Rows: 94616 Columns: 27
## ── Column specification ─────────────────────────────────────
## Delimiter: ","
## chr (17): DBA, BORO, BUILDING, STREET, CUISINE DESCRIPTION, INSPECTION DATE,...
## dbl (9): CAMIS, ZIPCODE, PHONE, SCORE, Latitude, Longitude, Community Board...
## lgl (1): Location Point1
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
grade_score_table <- data.frame(
Grade = c("A", "B", "C"),
`Score Range` = c("< 14", "14 - 27", "28 or more")
)
kable(
grade_score_table,
col.names = c("Grade", "Score Range"),
caption = "NYC Restaurant Inspection Grade Score Ranges"
)
NYC Restaurant Inspection Grade Score Ranges
| A |
< 14 |
| B |
14 - 27 |
| C |
28 or more |
filtered_data <- data %>%
select(`CUISINE DESCRIPTION`, SCORE) %>%
filter(!is.na(`CUISINE DESCRIPTION`) & !is.na(SCORE)) %>%
mutate(SCORE = as.numeric(SCORE)) %>%
filter(!is.na(SCORE))
top_cuisines <- filtered_data %>%
group_by(`CUISINE DESCRIPTION`) %>%
summarise(avg_score = mean(SCORE, na.rm = TRUE)) %>%
arrange(desc(avg_score)) %>%
slice(1:35)
top_cuisine_data <- filtered_data %>%
filter(`CUISINE DESCRIPTION` %in% top_cuisines$`CUISINE DESCRIPTION`)
top_cuisine_data <- top_cuisine_data %>%
mutate(`CUISINE DESCRIPTION` = factor(`CUISINE DESCRIPTION`,
levels = top_cuisines$`CUISINE DESCRIPTION`))
ggplot(top_cuisines, aes(x = reorder(`CUISINE DESCRIPTION`, -avg_score), y = avg_score)) +
geom_bar(stat = "identity", fill = "orange", color = "red") +
labs(title = "Top 35 Cuisines Types by Average Inspection Scores",
x = "Cuisine Description",
y = "Average Score") +
theme_minimal()+
theme(
axis.text = element_text(size = 8),
plot.title = element_text(hjust = 0.5, face = "bold"),
axis.text.x = element_text(angle = 45, hjust = 1)
)

ggplot(top_cuisine_data, aes(x = `CUISINE DESCRIPTION`, y = SCORE)) +
geom_boxplot(fill = "orange", color = "red") +
labs(title = "Inspection Score Distribution for Top 35 Cuisines",
x = "Cuisine Description",
y = "Inspection Score") +
theme_minimal()+
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
axis.text = element_text(size = 8),
plot.title = element_text(hjust = 0.5, face = "bold")
)

geo_coord <- data %>%
filter(
!is.na(SCORE),
!is.na(Latitude),
!is.na(Longitude)
) %>%
mutate(
SCORE = as.numeric(SCORE),
info = str_c(
DBA,
paste("Cuisine: ", `CUISINE DESCRIPTION`),
paste("Score: ", SCORE),
sep = "<br />"
)
) %>%
select(Longitude, Latitude, SCORE, info)
map_density <- plot_ly(
data = geo_coord,
lat = ~Latitude,
lon = ~Longitude,
z = ~SCORE,
type = "densitymapbox",
colorscale = "Viridis",
radius = 5,
hovertext = ~info,
zmin = 0,
zmax = 40
)
map_density <- map_density %>%
layout(
title = "Density Plot of Inspection Scores of Restaurant in Manhattan",
mapbox = list(
style = 'carto-positron',
zoom = 13,
center = list(lon = -73.9712, lat = 40.7831)
),
margin = list(r = 0, t = 30, b = 0, l = 0)
)
map_density